#INTRO: LOADING PACKAGES ETC----
library(foreign)
library(reshape2)
library(data.table)
library(stargazer)
library(MatchIt)
#library(nonrandom)
library(ggplot2)
#library(gdata1)
library(gmodels)
library(gridExtra)
library(haven)
library(plyr)
#library(car)
#library(xlsx)
library(tidyr)
library(reshape)
library(countrycode)
#library(dGlyr)
library(readr)
library(gnm)
library(MNP)
library(nls2)
library(nlstools)
library(stats)
library(dplyr)
library(matrixStats)
library(miceadds)
library(minpack.lm)
library(lfe)
library(lubridate)
library(AER)
library(reshape)
library(reshape2)
library(tictoc)
library(xtable)
library(Formula)
library(mlogit)
library(plm)
library(survival)
library(plotly)
library(alpaca)
library(lmtest)
library(sandwich)
# library(mnlogit)

library(foreach)#parallel foreach
library(iterators)
library(parallel)
library(doParallel)

library(msm)
library(tikzDevice)

library(ggpubr)#combine ggplot graphs together

library(estimatr)#lm_robust

library(clubSandwich)#coef_test

library(sjlabelled)#labelled variables (set_labels)

library(openxlsx)#read.xlsx

library(labelled)#change labels to columns

library(texreg)#extract for lm.cluster

options(width=2000)#set max characters shown
options(max.print=200000)#set max rows shown console

# !diagnostics suppress=Dsvr,Dgdm

rm(list=ls())
cat("\014")

#READ DATA:----
load("d29.RData")


#ASSIGN WEIGHTS:----

#assign Weight Country:
#calculate country (weighted by country population) weights:
# de = de %>% group_by(Es) %>% mutate(Resp = n()/6 )#number of respondent by Es
de$Pop=NA#country population in millions
de$Pop[de$Ec=="AUS"]=25
de$Pop[de$Ec=="AUT"]=9
de$Pop[de$Ec=="CAN"]=37
de$Pop[de$Ec=="DEU"]=68#West Germany (84 All Germany)
de$Pop[de$Ec=="DNK"]=6
de$Pop[de$Ec=="ESP"]=47
de$Pop[de$Ec=="FIN"]=6
de$Pop[de$Ec=="GBR"]=61
de$Pop[de$Ec=="GRC"]=10
de$Pop[de$Ec=="IRL"]=5
de$Pop[de$Ec=="ISL"]=1
de$Pop[de$Ec=="ISR"]=9
de$Pop[de$Ec=="ITA"]=61
de$Pop[de$Ec=="NLD"]=17
de$Pop[de$Ec=="NOR"]=5
de$Pop[de$Ec=="NZL"]=5
de$Pop[de$Ec=="PRT"]=10
de$Pop[de$Ec=="SWE"]=10
de$Rwp=de$Pop#Respondent Weight Country
de$Rwp=de$Rwp/mean(de$Rwp)#set mean weight == 1

#calculate country weights (weights s.t. all elections same number of respondents):
de = de %>% group_by(Es) %>% mutate(Resp = n()/6 )#number of respondent by Es
de$Rwc=1000/de$Resp#Respondent Weight Country
de$Rwc=de$Rwc/mean(de$Rwc)#set mean weight == 1
summary(de$Rwc)

#set demographic weights to average 1 by election:
# (since I deleted respondents w/o Va,Pl,Ll, they unbalance the weights for that election
# in a way that is election specific, in this way I reset sum weghts to be 1 by election)
de = de %>% group_by(Es) %>% mutate(Rwd=Rwd/mean(Rwd))
summary(de$Rwd)

#calculate country demographic weights:
# NB: this is equivalent to CSES Dataset Weight:
# | The derivative "Dataset Weight" (D1014) has been created so
# | that each election study in the dataset will contribute
# | equally to analyses of respondents, regardless of the number
# | of interviews in each election study.
de$Rwcd=de$Rwc*de$Rwd#Respondent Weight Country and Demographic
de$Rwcd=de$Rwcd/mean(de$Rwcd,na.rm=T)#set mean weight == 1
summary(de$Rwcd)
max(de$Rwcd)/min(de$Rwcd)

de$Rwcp=de$Rwc*de$Rwp
de$Rwcp=de$Rwcp/mean(de$Rwcp,na.rm=T)#set mean weight == 1

de$Rwcdp=de$Rwc*de$Rwd*de$Rwp
de$Rwcdp=de$Rwcdp/mean(de$Rwcdp,na.rm=T)#set mean weight == 1

de=de[,-which(names(de) %in% c("Resp","Pop"))]#remove working columns


#CALCULATE Pl,Ll BY ELECTION:----
reg=de %>% group_by(Es) %>% summarise(mod = list(summary(clogit(Va ~ Pl+Ll +strata(Esalt), robust=T, weights=Rwcd, method="efron"))$coefficients[,1]))#create Pl and Ll by Es
length(unlist(reg))#366
Es=unlist(reg)[1:122]
Pl=unlist(reg)[123:366]
Pl=Pl[seq(1,244,2)]
Ll=unlist(reg)[123:366]
Ll=Ll[seq(2,244,2)]
ds=data.frame(Es,Pl,Ll)
ds$Pl=as.numeric(as.character(ds$Pl))
ds$Ll=as.numeric(as.character(ds$Ll))
ds$PlLl=ds$Pl-ds$Ll
ds$Ey=substr(ds$Es,5,8)
ds$Ey=as.numeric(ds$Ey)
ds$Ec=substr(ds$Es,1,3)#
ds$time=ds$Ey-1960
rm(reg,Es,Pl,Ll)

#merge Ed:
dss=de[,c(3,4)]
dss=dss %>% group_by(Es) %>% filter(row_number (Es) == 1)
ds=merge(ds,dss,by="Es")
ds$Ed=as.factor(ds$Ed)#(for graphs)

#merge NP:
dss=de[,c(3,7)]
dss=dss %>% group_by(Es) %>% filter(row_number (Es) == 1)
ds=merge(ds,dss,by="Es")
ds$NP=as.factor(ds$NP)#(for graphs)


#BASE FOR COUNTRY SLOPE FE:----
#Ec:Pl FE:
for (i1 in 1:length(sort(unique(de$Ec)))) {#creating dummy variables Ec:Pl (for FE)
  de[[paste0("Pl_",sort(unique(de$Ec))[i1])]]=ifelse(de$Ec==sort(unique(de$Ec))[i1],de$Pl,0)#de$Pl
}
#Ec:Ll FE:
for (i1 in 1:length(sort(unique(de$Ec)))) {#creating dummy variables Ec:Ll (for FE)
  de[[paste0("Ll_",sort(unique(de$Ec))[i1])]]=ifelse(de$Ec==sort(unique(de$Ec))[i1],de$Ll,0)#de$Ll
}
#create text for regression function:
txt=""
for (i1 in 1:length(sort(unique(de$Ec)))) {#creating function text for dummy variables Es:alt except for alt=1
  txt=paste(txt,paste0("Pl_",sort(unique(de$Ec))[i1]),sep="+")
  txt=paste(txt,paste0("Ll_",sort(unique(de$Ec))[i1]),sep="+")
}


#ALTERNATIVE SPECIFICATIONS SPLINE TWO PERIODS:----

#optimization:
cores=detectCores()
cl=makeCluster(cores[1])
registerDoParallel(cl)#set parallel computing clusters
TRses=foreach (t = 1:59, .packages='survival', .combine='c') %dopar% {#getting threshold (via max LogLik):
  de$time1=pmin(de$time,t)
  de$time2=pmax(t,de$time)
  CLC_ses_=clogit(Va ~ Pl:time1+Ll:time1+Pl:time2+Ll:time2
                     +Pl_AUS+Ll_AUS+Pl_AUT+Ll_AUT+Pl_CAN+Ll_CAN+Pl_DEU+Ll_DEU+Pl_DNK+Ll_DNK+Pl_ESP+Ll_ESP+Pl_FIN+Ll_FIN+Pl_GBR+Ll_GBR+Pl_GRC+Ll_GRC+Pl_IRL+Ll_IRL+Pl_ISL+Ll_ISL+Pl_ISR+Ll_ISR+Pl_ITA+Ll_ITA+Pl_NLD+Ll_NLD+Pl_NOR+Ll_NOR+Pl_NZL+Ll_NZL+Pl_PRT+Ll_PRT+Pl_SWE+Ll_SWE
                     +strata(Esalt), data=de, method="efron")#regression
  CLC_ses_$loglik[2]
}
stopCluster(cl)#stop parallel computing clusters
tr=which.max(TRses)#38
trCLC_ses=tr#threshold
trCLC_ses


#ALTERNATIVE SPECIFICATIONS SPLINE THREE PERIODS:----

#optimization:
tic()
cores=detectCores()
cl=makeCluster(cores[1])
registerDoParallel(cl)#set parallel computing clusters
TRsess=foreach (i = 10000:16060, .packages='survival', .combine='c') %dopar% {#getting threshold (via max R2):
  t1=as.numeric(substr(i,2,3))
  t2=as.numeric(substr(i,4,5))
  ifelse(t1+t2<59, {
    tr1=t1
    tr2=t1+t2
    de$time1=pmin(de$time,tr1)#spline functions for coefficients the slopes in each segment
    de$time2=pmax(tr1,pmin(de$time,tr2))
    de$time3=pmax(tr2,de$time)
    CLC_sess_=clogit(Va ~ Pl:time1+Ll:time1+Pl:time2+Ll:time2+Pl:time3+Ll:time3
                            +Pl_AUS+Ll_AUS+Pl_AUT+Ll_AUT+Pl_CAN+Ll_CAN+Pl_DEU+Ll_DEU+Pl_DNK+Ll_DNK+Pl_ESP+Ll_ESP+Pl_FIN+Ll_FIN+Pl_GBR+Ll_GBR+Pl_GRC+Ll_GRC+Pl_IRL+Ll_IRL+Pl_ISL+Ll_ISL+Pl_ISR+Ll_ISR+Pl_ITA+Ll_ITA+Pl_NLD+Ll_NLD+Pl_NOR+Ll_NOR+Pl_NZL+Ll_NZL+Pl_PRT+Ll_PRT+Pl_SWE+Ll_SWE
                            +strata(Esalt), data=de, method="efron")#regression
    CLC_sess_$loglik[2]
  } , NA)
}
stopCluster(cl)#stop parallel computing clusters
toc()
tr=which.max(TRsess)
t1=as.numeric(substr((10000+(tr-1)),2,3))
t2=as.numeric(substr((10000+(tr-1)),4,5))
tr1=t1#9
tr2=t1+t2#37
tr1CLC_sess=tr1#
tr2CLC_sess=tr2#
tr1CLC_sess
tr2CLC_sess


#ALTERNATIVE SPECIFICATIONS SPLINE FOUR PERIODS:----

#optimization:
tic()
cores=detectCores()
cl=makeCluster(cores[1])
registerDoParallel(cl)#set parallel computing clusters
TRsesss=foreach (i = 1000000:1606060, .packages='survival', .combine='c') %dopar% {#getting threshold (via max R2):
  t1=as.numeric(substr(i,2,3))
  t2=as.numeric(substr(i,4,5))
  t3=as.numeric(substr(i,6,7))
  ifelse(t1+t2+t3<59, {
    tr1=t1
    tr2=t1+t2
    tr3=t1+t2+t3
    de$time1=pmin(de$time,tr1)#spline functions for coefficients the slopes in each segment
    de$time2=pmax(tr1,pmin(de$time,tr2))
    de$time3=pmax(tr2,pmin(de$time,tr3))
    de$time4=pmax(tr3,de$time)
    CLC_sesss_=clogit(Va ~ Pl:time1+Ll:time1+Pl:time2+Ll:time2+Pl:time3+Ll:time3+Pl:time4+Ll:time4
                         +Pl_AUS+Ll_AUS+Pl_AUT+Ll_AUT+Pl_CAN+Ll_CAN+Pl_DEU+Ll_DEU+Pl_DNK+Ll_DNK+Pl_ESP+Ll_ESP+Pl_FIN+Ll_FIN+Pl_GBR+Ll_GBR+Pl_GRC+Ll_GRC+Pl_IRL+Ll_IRL+Pl_ISL+Ll_ISL+Pl_ISR+Ll_ISR+Pl_ITA+Ll_ITA+Pl_NLD+Ll_NLD+Pl_NOR+Ll_NOR+Pl_NZL+Ll_NZL+Pl_PRT+Ll_PRT+Pl_SWE+Ll_SWE
                         +strata(Esalt), data=de, method="efron")#regression
    CLC_sesss_$loglik[2]
  } , NA)
}
stopCluster(cl)#stop parallel computing clusters
toc()
tr=which.max(TRsesss)#
t1=as.numeric(substr((1000000+(tr-1)),2,3))
t2=as.numeric(substr((1000000+(tr-1)),4,5))
t3=as.numeric(substr((1000000+(tr-1)),6,7))
tr1=t1#13
tr2=t1+t2#
tr3=t1+t2+t3#37
tr1CLC_sesss=tr1#
tr2CLC_sesss=tr2#
tr3CLC_sesss=tr3#
tr1CLC_sesss
tr2CLC_sesss
tr3CLC_sesss


#ALTERNATIVE SPECIFICATIONS SPLINE FIVE PERIODS:----

#optimization:
tic()
cores=detectCores()
cl=makeCluster(cores[1])
registerDoParallel(cl)#set parallel computing clusters
TRsessss=foreach (i = 100000000:160606060, .packages='survival', .combine='c') %dopar% {#getting threshold (via max R2):
  t1=as.numeric(substr(i,2,3))
  t2=as.numeric(substr(i,4,5))
  t3=as.numeric(substr(i,6,7))
  t4=as.numeric(substr(i,8,9))
  ifelse(t1+t2+t3+t4<59, {
    tr1=t1
    tr2=t1+t2
    tr3=t1+t2+t3
    tr4=t1+t2+t3+t4
    de$time1=pmin(de$time,tr1)#spline functions for coefficients the slopes in each segment
    de$time2=pmax(tr1,pmin(de$time,tr2))
    de$time3=pmax(tr2,pmin(de$time,tr3))
    de$time4=pmax(tr3,pmin(de$time,tr4))
    de$time5=pmax(tr4,de$time)
    CLC_sessss_=clogit(Va ~ Pl:time1+Ll:time1+Pl:time2+Ll:time2+Pl:time3+Ll:time3+Pl:time4+Ll:time4+Pl:time5+Ll:time5
                         +Pl_AUS+Ll_AUS+Pl_AUT+Ll_AUT+Pl_CAN+Ll_CAN+Pl_DEU+Ll_DEU+Pl_DNK+Ll_DNK+Pl_ESP+Ll_ESP+Pl_FIN+Ll_FIN+Pl_GBR+Ll_GBR+Pl_GRC+Ll_GRC+Pl_IRL+Ll_IRL+Pl_ISL+Ll_ISL+Pl_ISR+Ll_ISR+Pl_ITA+Ll_ITA+Pl_NLD+Ll_NLD+Pl_NOR+Ll_NOR+Pl_NZL+Ll_NZL+Pl_PRT+Ll_PRT+Pl_SWE+Ll_SWE
                         +strata(Esalt), data=de, method="efron")#regression
    CLC_sessss_$loglik[2]
  } , NA)
}
stopCluster(cl)#stop parallel computing clusters
toc()
tr=which.max(TRsessss)#
t1=as.numeric(substr((100000000+(tr-1)),2,3))
t2=as.numeric(substr((100000000+(tr-1)),4,5))
t3=as.numeric(substr((100000000+(tr-1)),6,7))
t4=as.numeric(substr((100000000+(tr-1)),8,9))
tr1=t1#8
tr2=t1+t2#13
tr3=t1+t2+t3#14
tr4=t1+t2+t3+t4#37
tr1CLC_sessss=tr1#
tr2CLC_sessss=tr2#
tr3CLC_sessss=tr3#
tr4CLC_sessss=tr4#
tr1CLC_sessss
tr2CLC_sessss
tr3CLC_sessss
tr4CLC_sessss

